@article{DQN,
  author    = {Volodymyr Mnih and
               Koray Kavukcuoglu and
               David Silver and
               Andrei A. Rusu and
               Joel Veness and
               Marc G. Bellemare and
               Alex Graves and
               Martin A. Riedmiller and
               Andreas Fidjeland and
               Georg Ostrovski and
               Stig Petersen and
               Charles Beattie and
               Amir Sadik and
               Ioannis Antonoglou and
               Helen King and
               Dharshan Kumaran and
               Daan Wierstra and
               Shane Legg and
               Demis Hassabis},
  title     = {Human-level control through deep reinforcement learning},
  journal   = {Nature},
  volume    = {518},
  number    = {7540},
  pages     = {529--533},
  year      = {2015},
  url       = {https://doi.org/10.1038/nature14236},
  doi       = {10.1038/nature14236},
  timestamp = {Wed, 14 Nov 2018 10:30:43 +0100},
  biburl    = {https://dblp.org/rec/journals/nature/MnihKSRVBGRFOPB15.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@inproceedings{DDPG,
  author    = {Timothy P. Lillicrap and
               Jonathan J. Hunt and
               Alexander Pritzel and
               Nicolas Heess and
               Tom Erez and
               Yuval Tassa and
               David Silver and
               Daan Wierstra},
  title     = {Continuous control with deep reinforcement learning},
  booktitle = {4th International Conference on Learning Representations, {ICLR} 2016,
               San Juan, Puerto Rico, May 2-4, 2016, Conference Track Proceedings},
  year      = {2016},
  url       = {http://arxiv.org/abs/1509.02971},
  timestamp = {Thu, 25 Jul 2019 14:25:37 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/LillicrapHPHETS15.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}

@article{PPO,
  author    = {John Schulman and
               Filip Wolski and
               Prafulla Dhariwal and
               Alec Radford and
               Oleg Klimov},
  title     = {Proximal Policy Optimization Algorithms},
  journal   = {CoRR},
  volume    = {abs/1707.06347},
  year      = {2017},
  url       = {http://arxiv.org/abs/1707.06347},
  archivePrefix = {arXiv},
  eprint    = {1707.06347},
  timestamp = {Mon, 13 Aug 2018 16:47:34 +0200},
  biburl    = {https://dblp.org/rec/journals/corr/SchulmanWDRK17.bib},
  bibsource = {dblp computer science bibliography, https://dblp.org}
}
